home
***
CD-ROM
|
disk
|
FTP
|
other
***
search
/
Atari Mega Archive 1
/
Atari Mega Archive - Volume 1.iso
/
mint
/
utils
/
agrepsrc.zoo
/
main.c
< prev
next >
Wrap
C/C++ Source or Header
|
1993-01-19
|
36KB
|
1,032 lines
/* Copyright (c) 1991 Sun Wu and Udi Manber. All Rights Reserved. */
#include "agrep.h"
#include "checkfile.h"
unsigned Mask[MAXSYM];
unsigned Init1, NO_ERR_MASK, Init[MaxError];
unsigned Bit[WORD+1];
CHAR buffer[BlockSize+Maxline+1];
unsigned Next[MaxNext], Next1[MaxNext];
unsigned wildmask, endposition, D_endpos;
int REGEX, RE_ERR, FNAME, WHOLELINE, SIMPLEPATTERN;
int COUNT, HEAD, TAIL, LINENUM, INVERSE, I, S, DD, AND, SGREP, JUMP;
int Num_Pat, PSIZE, num_of_matched, SILENT, BESTMATCH, NOUPPER;
int NOMATCH, TRUNCATE, FIRST_IN_RE, FIRSTOUTPUT;
int WORDBOUND, DELIMITER, D_length;
int EATFIRST, OUTTAIL;
int FILEOUT;
int DNA = 0;
int APPROX = 0;
int PAT_FILE = 0;
int CONSTANT = 0;
int total_line = 0; /* used in mgrep */
CHAR **Textfiles; /* array of filenames to be searched */
CHAR old_D_pat[MaxDelimit] = "\n"; /* to hold original D_pattern */
CHAR CurrentFileName[MAXNAME];
CHAR Progname[MAXNAME];
CHAR D_pattern[MaxDelimit] = "\n; "; /* string which delimits records --
defaults to newline */
int NOFILENAME = 0, /* Boolean flag, set for -h option */
FILENAMEONLY = 0,/* Boolean flag, set for -l option */
Numfiles = 0; /* indicates how many files in Textfiles */
extern int init();
int table[WORD][WORD];
initial_value()
{
int i;
JUMP = REGEX = FNAME = BESTMATCH = NOUPPER = 0;
COUNT = LINENUM = WHOLELINE = SGREP = 0;
EATFIRST = INVERSE = AND = TRUNCATE = OUTTAIL = 0;
FIRST_IN_RE = NOMATCH = FIRSTOUTPUT = ON;
I = DD = S = 1;
HEAD = TAIL = ON;
D_length = 2;
SILENT = Num_Pat = PSIZE = SIMPLEPATTERN = num_of_matched = 0 ;
WORDBOUND = DELIMITER = RE_ERR = 0;
Bit[WORD] = 1;
for (i = WORD - 1; i > 0 ; i--) Bit[i] = Bit[i+1] << 1;
for (i=0; i< MAXSYM; i++) Mask[i] = 0;
}
compute_next(M, Next, Next1)
int M; unsigned *Next, *Next1;
{
int i, j=0, n, k, temp;
int mid, pp;
int MM, base;
unsigned V[WORD];
base = WORD - M;
temp = Bit[base]; Bit[base] = 0;
for (i=0; i<WORD; i++) V[i] = 0;
for (i=1; i<M; i++)
{
j=0;
while (table[i][j] > 0 && j < 10) {
V[i] = V[i] | Bit[base + table[i][j++]];
}
}
Bit[base]=temp;
if(M <= SHORTREG)
{
k = exponen(M);
pp = 2*k;
for(i=k; i<pp ; i++)
{ n = i;
Next[i]= (k>>1);
for(j=M; j>=1; j--)
{
if(n & Bit[WORD]) Next[i] = Next[i] | V[j];
n = (n>>1);
}
}
return;
}
if(M > MAXREG) fprintf(stderr, "%s: regular expression too long\n", Progname);
MM = M;
if(M & 1) M=M+1;
k = exponen(M/2);
pp = 2*k;
mid = MM/2;
for(i=k; i<pp ; i++)
{ n = i;
Next[i]= (Bit[base]>>1);
for(j=MM; j>mid ; j--)
{
if(n & Bit[WORD]) Next[i] = Next[i] | V[j-mid];
n = (n>>1);
}
n=i-k;
Next1[i-k] = 0;
for(j = 0; j<mid; j++)
{
if(n & Bit[WORD]) Next1[i-k] = Next1[i-k] | V[MM-j];
n = (n>>1);
}
}
return;
}
exponen(m)
int m;
{ int i, ex;
ex= 1;
for (i=0; i<m; i++) ex= ex*2;
return(ex);
}
re1(Text, M, D)
int Text, M, D;
{
register unsigned i, c, r0, r1, r2, r3, CMask, Newline, Init0, r_NO_ERR;
register unsigned end;
register unsigned hh, LL=0, k; /* Lower part */
int FIRST_TIME=ON, num_read , j=0, base;
unsigned A[MaxRerror+1], B[MaxRerror+1];
unsigned Next[MaxNext], Next1[MaxNext];
CHAR buffer[BlockSize+Maxline+1];
int FIRST_LOOP = 1;
r_NO_ERR = NO_ERR_MASK;
if(M > 30) {
fprintf(stderr, "%s: regular expression too long\n", Progname);
exit(2);
}
base = WORD - M;
hh = M/2;
for(i=WORD, j=0; j < hh ; i--, j++) LL = LL | Bit[i];
if(FIRST_IN_RE) compute_next(M, Next, Next1);
/*SUN: try: change to memory allocation */
FIRST_IN_RE = 0;
Newline = '\n';
Init[0] = Bit[base];
if(HEAD) Init[0] = Init[0] | Bit[base+1];
for(i=1; i<= D; i++) Init[i] = Init[i-1] | Next[Init[i-1]>>hh] | Next1[Init[i-1]&LL];
Init1 = Init[0] | 1;
Init0 = Init[0];
r2 = r3 = Init[0];
for(k=0; k<= D; k++) { A[k] = B[k] = Init[k]; }
if ( D == 0 )
{
while ((num_read = read(Text, buffer + Maxline, BlockSize)) > 0)
{
i=Maxline; end = num_read + Maxline;
if((num_read < BlockSize) && buffer[end-1] != '\n') buffer[end] = '\n';
if(FIRST_LOOP) { /* if first time in the loop add a newline */
buffer[i-1] = '\n'; /* in front the text. */
i--;
FIRST_LOOP = 0;
}
while ( i < end )
{
c = buffer[i++];
CMask = Mask[c];
if(c != Newline)
{ if(CMask != 0) {
r1 = Init1 & r3;
r2 = ((Next[r3>>hh] | Next1[r3&LL]) & CMask) | r1;
}
else {
r2 = r3 & Init1;
}
}
else { j++;
r1 = Init1 & r3; /* match against endofline */
r2 = ((Next[r3>>hh] | Next1[r3&LL]) & CMask) | r1;
if(TAIL) r2 = (Next[r2>>hh] | Next1[r2&LL]) | r2; /* epsilon move */
if(( r2 & 1 ) ^ INVERSE) {
if(FILENAMEONLY) {
num_of_matched++;
printf("%s\n", CurrentFileName);
return;
}
r_output(buffer, i-1, end, j);
}
r3 = Init0;
r2 = (Next[r3>>hh] | Next1[r3&LL]) & CMask | Init0;
/* match begin of line */
}
c = buffer[i++];
CMask = Mask[c];
if(c != Newline)
{ if(CMask != 0) {
r1 = Init1 & r2;
r3 = ((Next[r2>>hh] | Next1[r2&LL]) & CMask) | r1;
}
else r3 = r2 & Init1;
} /* if(NOT Newline) */
else { j++;
r1 = Init1 & r2; /* match against endofline */
r3 = ((Next[r2>>hh] | Next1[r2&LL]) & CMask) | r1;
if(TAIL) r3 = ( Next[r3>>hh] | Next1[r3&LL] ) | r3;
/* epsilon move */
if(( r3 & 1 ) ^ INVERSE) {
if(FILENAMEONLY) {
num_of_matched++;
printf("%s\n", CurrentFileName);
return;
}
r_output(buffer, i-1, end, j);
}
r2 = Init0;
r3 = ((Next[r2>>hh] | Next1[r2&LL]) & CMask) | Init0;
/* match begin of line */
}
} /* while i < end ... */
strncpy(buffer, buffer+num_read, Maxline);
} /* end while read()... */
return;
} /* end if (D == 0) */
while ((num_read = read(Text, buffer + Maxline, BlockSize)) > 0)
{
i=Maxline; end = Maxline + num_read;
if((num_read < BlockSize) && buffer[end-1] != '\n') buffer[end] = '\n';
if(FIRST_TIME) { /* if first time in the loop add a newline */
buffer[i-1] = '\n'; /* in front the text. */
i--;
FIRST_TIME = 0;
}
while (i < end )
{
c = buffer[i];
CMask = Mask[c];
if(c != Newline)
{
if(CMask != 0) {
r2 = B[0];
r1 = Init1 & r2;
A[0] = ((Next[r2>>hh] | Next1[r2&LL]) & CMask) | r1;
r3 = B[1];
r1 = Init1 & r3;
r0 = r2 | A[0]; /* A[0] | B[0] */
A[1] = ((Next[r3>>hh] | Next1[r3&LL]) & CMask) | (( r2 | Next[r0>>hh] | Next1[r0&LL])&r_NO_ERR) | r1 ;
if(D == 1) goto Nextchar;
r2 = B[2];
r1 = Init1 & r2;
r0 = r3 | A[1];
A[2] = ((Next[r2>>hh] | Next1[r2&LL]) & CMask) |